#!/usr/bin/env python3
# -*- coding: utf-8 -*-
##############################################
# @Author: DengLibin 榆霖
# @Date: Create in 2022-03-08 13:15:14
# @Description: 豆瓣网页
##############################################
'module name'
__author__ = 'DengLibin'

import csv
import re

import requests

url  = 'https://movie.douban.com/top250'


##############################################
# @Author: DengLibin 榆霖
# @Date: Create in 2022-03-08 13:16:06
# @Description: 抓取豆瓣top250网页
##############################################
def run():
     # 设置请求头
    # headers_dic = {
    #     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
    # }
    # resp = requests.get(url, headers=headers_dic)
    
    # # html页面
    # page_content = resp.text
    # resp.close()
    
    # 保存到文件
    # with open(file='top250.html', mode='w', encoding='utf-8') as f:
    #     f.write(page_content)
    
    # 编译正则表达式
    obj = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)'
                    r'</span>.*?<p class="">.*?<br>(?P<year>.*?)&nbsp.*?'
                    r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                    r'.*?<span>(?P<num>.*?)人评价',
                    re.S)
    
    with open(file='top250.html', mode='r', encoding='utf-8') as f:
        page_content = f.read()
        r = obj.finditer(page_content)
        with open('data.csv', mode='w', encoding='utf-8', newline="") as csv_file:
            csv_writer = csv.writer(csv_file)
            for i in r:
                print(i.group('name'), i.group('year').strip(), i.group('score'),  i.group('num'))
                dic = i.groupdict()
                dic['year'] = dic['year'].strip()
                csv_writer.writerow(dic.values())
    
    print('完成')
if __name__ == '__main__':
    run()
